############################################################################
# RE main function
#
# - dt at the i-level
# - dt_ext at tje (i,t)-level
# - dt_sim is simulated data at the i level (across b values)


############################################################################

RE_func = function(data,data_ext=0,quiet=TRUE,min0=NA,max0=NA,min0_mg=NA,max0_mg=NA,model="cte",Nsim=4000) {

  dt = copy(data)
  dt_ext = copy(data_ext)
  
  # Prepare data for simulated densities and CDF
  min_val=min(data$coef,min0,na.rm=TRUE)
  max_val=max(data$coef,max0,na.rm=TRUE)
  dt_sim = data.table(id=seq(1,Nsim),x=seq(min_val,max_val,length.out=Nsim))
  summary(dt_sim)
  
  ############################################################################
  # Prior 
  ############################################################################
  
  prior= prior_beta(data=dt,data_sim=dt_sim,model=model,w_name="w",quiet=quiet) 
  dt = copy(prior$dt)
  dt_sim = copy(prior$dt_sim)
  
  ############################################################################
  # Signal 
  ############################################################################
  
  var_noise = wtd.mean(dt$se2,weights=dt$w)
  var_noise
  var_FE = wtd.var(dt$coef,weights=dt$w)
  var_FE
  mean_FE = wtd.mean(dt$coef,weights=dt$w)
  mean_FE
  
  # CDF of normal fit to FE
  dt_sim[,FE_approx_cdf:=pnorm(x,mean=mean_FE,sd=sqrt(var_FE))]
  
  # Normal fit to FE 
  dt_sim[,FE_approx_den:=dnorm(x,mean=mean_FE,sd=sqrt(var_FE))]
  
  # CDF of FE (weighted)
  cdf_coef = dt[,.SD,.SD=c("coef","w","id")]
  cdf_coef = cdf_coef[order(cdf_coef$coef),]
  cdf_coef[,cdf_FE := cumsum(w)/sum(cdf_coef$w)]
  dt = merge(dt,cdf_coef[,.SD,.SDcols=c("id","cdf_FE")],by="id",all=TRUE)
  
  dim(dt)
  summary(dt)

  ############################################################################
  # Posterior distribution
  ############################################################################
  
  # Posterior means
  dt[,rho:=var0/(var0+se2)]
  dt[,mu1:=mu0*(1-rho) + rho*coef]
  summary(dt)
  
  # Posterior variances
  dt[,var1:=var0*(1-rho)]
  summary(dt)
  
  # Posterior density
  for (k in seq(1,Nsim)) {
    dt[,pdf_var:=dnorm((dt_sim[id==k,x]-mu1)/sqrt(var1))/sqrt(var1)]
    dt_sim[id==k,post_den:=wtd.mean(dt$pdf_var,weights=dt$w)]
  }
  summary(dt_sim)
  dt[,pdf_var:=NULL]
  
  # Posterior cdf
  for (k in seq(1,Nsim)) {
    dt[,cdf_var:=pnorm((dt_sim[id==k,x]-mu1)/sqrt(var1))]
    dt_sim[id==k,post_cdf:=wtd.mean(dt$cdf_var,weights=dt$w)]
  }
  summary(dt_sim)
  dt[,cdf_var:=NULL]
  
  ############################################################################
  # Characterization of the posterior distribution (i-level)
  ############################################################################

  # Columns overall distribution per se
  q = c(0.1,0.25,0.5,0.75,0.9)
  dt[,ones:=1]
  cdf1= corrected_cdf(data=dt,w_name="w",Nsim=Nsim+1,min_val=min_val, max_val=max_val,quiet=quiet)
  quan1 =corrected_quantiles(mat_cdf=cdf1,q=q)
  v =c(quan1,corrected_mean(dt,w_name="w"),corrected_var(dt,w_name="w"),nrow(dt))

  # Sort columns
  s = as.matrix(v,ncol=1)
  rownames(s)=c(paste0("Percentile ",c(10,25,50,75,90)),"Mean","Variance","N")

  ############################################################################
  # Marginal effects (i,t) level
  ############################################################################
  
  dt_mg_sim=NA
  s_mg=NA

  if (length(data_ext)>1) {
    
    #### Paste the mu1 and var1 of coefficients to data_ext 
    dt_ext = merge(dt_ext,dt[,.(id,coef,mu0,var0,mu1,var1)],by="id",all=TRUE)
    dt_ext[, mg:=coef]
    summary(dt_ext)
    
    #### Generate data for simulated distribution
    min_mg=min(dt_ext$mg,dt_ext$mu0,min0_mg,na.rm=TRUE)
    max_mg=max(dt_ext$mg,dt_ext$mu0,max0_mg,na.rm=TRUE)
    dt_mg_sim = data.table(id=seq(1,Nsim),x=seq(min_mg,max_mg,length.out=Nsim))
    summary(dt_mg_sim)
    
    #### Prior distribution
    for (k in seq(1,Nsim)) {
      dt_ext[,pdf_var:=dnorm((dt_mg_sim[id==k,x]-mu0)/sqrt(var0))/sqrt(var0)]
      dt_mg_sim[id==k,prior_den:=wtd.mean(dt_ext$pdf_var,weights=dt_ext$w)]
    }
    summary(dt_mg_sim)
    dt_ext[,pdf_var:=NULL]

    #### Prior cdf
    for (k in seq(1,Nsim)) {
      dt_ext[,cdf_var:=pnorm((dt_mg_sim[id==k,x]-mu0)/sqrt(var0))]
      dt_mg_sim[id==k,prior_cdf:=wtd.mean(dt_ext$cdf_var,weights=dt_ext$w)]
    }
    summary(dt_mg_sim)
    dt_ext[,cdf_var:=NULL]
    
    #### Posterior distribution
    for (k in seq(1,Nsim)) {
      dt_ext[,pdf_var:=dnorm((dt_mg_sim[id==k,x]-mu1)/sqrt(var1))/sqrt(var1)]
      dt_mg_sim[id==k,post_den:=wtd.mean(dt_ext$pdf_var,weights=dt_ext$w)]
    }
    summary(dt_mg_sim)
    dt_ext[,pdf_var:=NULL]
    
    #### Posterior cdf
    for (k in seq(1,Nsim)) {
      dt_ext[,cdf_var:=pnorm((dt_mg_sim[id==k,x]-mu1)/sqrt(var1))]
      dt_mg_sim[id==k,post_cdf:=wtd.mean(dt_ext$cdf_var,weights=dt_ext$w)]
    }
    summary(dt_mg_sim)
    dt_ext[,cdf_var:=NULL]
    
    #### Characterize posterior distribution
    dt_ext[,ones:=1]
    cdf1_mg= corrected_cdf(data=dt_ext,w_name="w",Nsim=Nsim+1,min_val=min_mg, max_val=max_mg,quiet=quiet)
    quan1 =corrected_quantiles(mat_cdf=cdf1_mg,q=q)
    v =c(quan1,corrected_mean(dt_ext,w_name="w"),corrected_var(dt_ext,w_name="w"),nrow(dt_ext))
    s_mg = as.matrix(v,ncol=1)
    rownames(s_mg)=c(paste0("Percentile ",c(10,25,50,75,90)),"Mean","Variance","N")

  }
  
  
  ############################################################################
  # Return
  ############################################################################
  
  return(list(dt=dt,dt_sim=dt_sim,dt_ext=dt_ext,dt_mg_sim=dt_mg_sim,table_char=s,table_mg=s_mg))
  
}


###############################################################################
# Function that returns the posterior distribution CDF
# For beta coefficients
###############################################################################

corrected_cdf = function(data,w_name="ones",Nsim=2001,min_val=-50,max_val=140,quiet=FALSE) { 
  
  dt_slop = copy(data)
  
  # Get a sense of the range of the CDF
  if (quiet==FALSE) {
    aa=as.numeric(dt_slop[,lapply(.SD,mean),.SDcols="mu1"])
    bb=as.numeric(dt_slop[,lapply(.SD,mean),.SDcols="var1"])
    print(qnorm(p=c(seq(0.01,0.05,0.01),seq(0.95,0.99,0.01)), mean = aa, sd = sqrt(bb), lower.tail = TRUE, log.p = FALSE))
  }
  
  # Generate relative weight
  dt_slop[,w:=.SD,.SDcols=w_name]
  
  # Generate values for estimating the cdf, symmetric and odd so it includes 0
  bvals=seq(min_val,max_val,length.out=Nsim)
  mat_cdf =matrix(NA,nrow=Nsim,ncol=1)
  
  # Empirical posterior CDF (weighted) 
  dt_slop[,post_mean:=.SD,.SDcols="mu1"]
  dt_slop[,post_var:=.SD,.SDcols="var1"]
  for (k in seq(1,Nsim,1)) {
    dt_slop[,aux:=(bvals[k]-post_mean)/sqrt(post_var)]
    dt_slop[,cdf:=pnorm(aux)]
    mat_cdf[k,1]= as.matrix(dt_slop[,lapply(.SD,wtd.mean,weights=w),.SDcols="cdf"])
  }  
  
  mat_cdf=as.data.table(mat_cdf)
  mat_cdf[,b:=bvals]
  
  return(mat_cdf)
}
###############################################################################
# Function that returns quantiles of the posterior distribution
###############################################################################

corrected_quantiles = function(mat_cdf,q) {
  
  v = c()
  for (l in seq(1,length(q),1)) {
    mat_now = mat_cdf[V1<=q[l]]
    v = c(v,mat_now[nrow(mat_now),b])
  }
  return(v)
  
}

###############################################################################
# Function that returns the mean of the posterior distribution
###############################################################################

corrected_mean = function(data,w_name="ones") {
  dt_now=copy(data)
  dt_now[,w:=.SD,.SDcols=w_name]
  return(as.matrix(dt_now[,lapply(.SD,wtd.mean,weights=w,na.rm=TRUE),.SDcols="mu1"]))
}

############################################################################
# Function that returns the variance of the posterior distribution
############################################################################

# Corrected variance
corrected_var = function(data,w_name="ones") {
  
  dt_slop = copy(data)
  dt_slop[,w:=.SD,.SDcols=w_name]
  Sw = sum(dt_slop$w)
  dt_slop[,w_tilde:=w/Sw*(1-w/Sw)]  
  var_mu1 = wtd.var(dt_slop$mu1,weights=dt_slop$w,method="ML") 
  mean_var1 =wtd.mean(dt_slop$var1,weights=dt_slop$w_tilde)* sum(dt_slop$w_tilde) 
  var_FP = var_mu1 + mean_var1
  
  return(var_FP)
}

###############################################################################
# Function that estimates prior, given se and model for the prior
# for beta coefficients
###############################################################################

prior_beta = function(data,data_sim,model,w_name="ones",shrink=1,quiet=FALSE) {
  
  #
  dt_slopes = copy(data)
  dt_slopes[,z_sq:=z^2]
  dt_slopes[,ones:=1]
  dt_sim = copy(data_sim)
  
  ########################
  # Prior Mean
  ########################
  ww = dt_slopes[,.SD,.SDcols=w_name]
  setnames(ww,"w")
  ols_mean = lm(coef ~ z ,data=dt_slopes,weights=ww$w)
  dt_slopes[,mu0 := predict(ols_mean)]
  if (model=="cte") dt_slopes[,mu0 := wtd.mean(coef,weights=w)]
  
  ########################
  # Prior Variance
  ########################
  dt_slopes[,beta_tilde:=coef-mu0]
  dt_slopes[,yn:=beta_tilde^2-se^2]
  dt_slopes[,se2:=se^2]
  if (model=="cte") {
    dt_slopes[,var0:=wtd.var(coef,weights=w)-wtd.mean(se2,weights=w)*shrink]
  }
  if (model=="mean_w") {
    dt_slopes[,var0:=wtd.mean(yn,weights=w)]
    if (mean(dt_slopes$var0)<=0) {
      dt_slopes[,var0:=wtd.var(coef,weights=w)-wtd.mean(se2,weights=w)*shrink]
    }
  }
  if (model=="exp1") {
    start_value=c(g0=1,g1=0)
    fit = nls(formula="yn ~ exp(g0+z*g1)",start=start_value,data=dt_slopes,weights=w)
    dt_slopes[,var0:=predict(fit)]
    
  }
  if (model=="exp2") {
    start_value=c(g0=1,g1=0)
    fit = nls(formula="yn ~ exp(g0+log(z)*g1)",start=start_value,data=dt_slopes,weights=w)
    dt_slopes[,var0:=predict(fit)]
  }
  if (model=="exp3") {
    start_value=c(g0=1,g1=0)
    fit = nls(formula="yn ~ exp(g0+sd_z*g1)",start=start_value,data=dt_slopes,weights=w)
    dt_slopes[,var0:=predict(fit)]
  }
  if (model=="exp4") {
    start_value=c(g0=1,g1=0,g2=0)
    fit = nls(formula="yn ~ exp(g0+z*g1+sd_z*g2)",start=start_value,data=dt_slopes,weights=w)
    dt_slopes[,var0:=predict(fit)]
  }
  if (model=="bins") {
    
    # Classify counties according to mean temperature in 3 bins
    qqw = quantile(dt_slopes$z,probs=seq(0,1,length.out = 3+1))
    dt_slopes[,gw:=1]
    for (k in seq(1,3)) {
      dt_slopes[z>qqw[k],gw:=k]  
    }
    
    # Generate variance of FE slopes per group
    dt_slopes[,var_betaFE:=lapply(.SD,wtd.var,weights=w),by="gw",.SDcols="coef"]
    
    # Generate the average s.e. per group
    dt_slopes[,var_noise:=lapply(.SD,wtd.mean,weights=w),by="gw",.SDcols="se2"]
    
    # Prior variance
    dt_slopes[,var0:=var_betaFE - var_noise]
    
  }
  
  
  ########################
  # Prior density
  ########################
  
  for (k in seq(1,nrow(dt_sim))) {
    dt_slopes[,pdf_var:=dnorm((dt_sim[id==k,x]-mu0)/sqrt(var0))/sqrt(var0)]
    dt_sim[id==k,prior_den:=wtd.mean(dt_slopes$pdf_var,weights=dt_slopes$w)]
  }
  summary(dt_sim)
  dt_slopes[,pdf_var:=NULL]
  
  ########################
  # Prior cdf
  ########################
  
  for (k in seq(1,nrow(dt_sim))) {
    dt_slopes[,cdf_var:=pnorm((dt_sim[id==k,x]-mu0)/sqrt(var0))]
    dt_sim[id==k,prior_cdf:=wtd.mean(dt_slopes$cdf_var,weights=dt_slopes$w)]
  }
  summary(dt_sim)
  dt_slopes[,cdf_var:=NULL]
  
  
  ########################
  # Return
  ########################
  
  dt_slopes=dt_slopes[,.SD,.SDcols=c("id","z","sd_z","coef","se2","mu0","var0","w")]
  return(list(dt=dt_slopes,dt_sim=dt_sim))
  
}


############################################################################
# Function that plots per res
# Prior (green), FE (black), P (blue), PM (red)
############################################################################

re_plot = function(res,adj_fe=1,adj_mu1=1,adj_mg=1,xx=0,xx_mg=0) {
  
  ########################################
  # Coefficients
  ########################################
  
  g= ggplot(data=res$dt,aes(x=coef,weight=w))+ geom_density(adjust=adj_fe,color="black") + theme_bw() +
    geom_density(aes(x=mu1,weight=w),color="red",adjust=adj_mu1) +
    geom_line(data=res$dt_sim,aes(x=x,y=post_den,weight=NULL),color="blue") +
    geom_line(data=res$dt_sim,aes(x=x,y=prior_den,weight=NULL),linetype="dashed",color="green") +
    theme(title=element_text(size=14),plot.title=element_text(hjust=0.5),
          plot.subtitle=element_text(hjust=0.5), axis.text = element_text(size=18),
          axis.title = element_text(size=18),
          legend.position = 'right',legend.text = element_text(size=14)) +
    #ggtitle("FE (solid black) vs posterior (dashed blue)") +
    xlab("Coefficients") + ylab("Density") 
  if (length(xx)>1) {
    g = g + xlim(xx) 
  }
  
  g_coef = g
  
  ########################################
  # Marginal effects
  ########################################
  
  g= ggplot(data=res$dt_ext,aes(x=mg,weight=w))+ geom_density(adjust=adj_mg,color="black") + theme_bw() +
    geom_density(aes(x=mu1,weight=w),color="red",adjust=adj_mu1) +
    geom_line(data=res$dt_mg_sim,aes(x=x,y=post_den,weight=NULL),color="blue") +
    geom_line(data=res$dt_mg_sim,aes(x=x,y=prior_den,weight=NULL),linetype="dashed",color="green") +
    theme(title=element_text(size=14),plot.title=element_text(hjust=0.5),
          plot.subtitle=element_text(hjust=0.5), axis.text = element_text(size=18),
          axis.title = element_text(size=18),
          legend.position = 'right',legend.text = element_text(size=14)) +
    #ggtitle("FE (solid black) vs posterior (dashed blue)") +
    xlab("Marginal effects") + ylab("Density") 
  if (length(xx_mg)>1) {
    g = g + xlim(xx_mg) 
  }
  g_mg = g
  
  ########################################
  # Return
  ########################################
  
  return(list(g_coef=g_coef,g_mg=g_mg))
  
}

############################################################################
# Function that tabulate per res
# Prior, FE, P, PM
############################################################################

re_tab = function(res) {
  
  ########################################
  # Prior's means and variances and 
  # shrinkage
  ########################################

  q=c(0.1,0.25,0.5,0.75,0.9)
  s=as.matrix(res$dt[,lapply(.SD,wtd.quantile,probs=c(0.1,0.25,0.5,0.75,0.9),na.rm=TRUE,weights=w),
                     .SDcols=c("mu0","var0","rho")])
  s=rbind(s,res$dt[,lapply(.SD,wtd.mean,na.rm=TRUE,weights=w),.SDcols=c("mu0","var0","rho")])
  s=rbind(s,res$dt[,lapply(.SD,wtd.var,na.rm=TRUE,weights=w),.SDcols=c("mu0","var0","rho")])
  tab = as.matrix(s)
  rownames(tab)=c(paste0("Percentile ",c(10,25,50,75,90)),"Mean","Variance")  
  colnames(tab)=c("Prior mean","Prior variance","Shrinkage")  
  
  tab_prior = tab 

  ########################################
  # Coefficients
  ########################################
  # Prior
  mat_cdf = res$dt_sim[,.(x,prior_cdf)]
  setnames(mat_cdf,c("b","V1"))
  col_prior = c(corrected_quantiles(mat_cdf=mat_cdf,q=q),wtd.mean(res$dt$mu0,weights=res$dt$w),
                wtd.mean(res$dt$var0,weights=res$dt$w)+wtd.var(res$dt$mu0,weights=res$dt$w))
  
  # FE and Posterior means
  s=as.matrix(res$dt[,lapply(.SD,wtd.quantile,probs=c(0.1,0.25,0.5,0.75,0.9),na.rm=TRUE,weights=w),
                     .SDcols=c("coef","mu1")])
  s=rbind(s,res$dt[,lapply(.SD,wtd.mean,na.rm=TRUE,weights=w),.SDcols=c("coef","mu1")])
  s=rbind(s,res$dt[,lapply(.SD,wtd.var,na.rm=TRUE,weights=w),.SDcols=c("coef","mu1")])
  
  # Posterior 
  col_p = res$table_char[1:7,]
  
  # Together: prior, FE, P, PM
  tab = cbind(col_prior,s[,1],col_p,s[,2])
  tab = as.matrix(tab)
  rownames(tab)=c(paste0("Percentile ",c(10,25,50,75,90)),"Mean","Variance")  
  colnames(tab)=c("Prior","FE","Posterior","Posterior means")  
  
  tab_coef=tab
  
  ########################################
  # Marginal effects
  ########################################
  # Prior
  mat_cdf = res$dt_mg_sim[,.(x,prior_cdf)]
  setnames(mat_cdf,c("b","V1"))
  col_prior = c(corrected_quantiles(mat_cdf=mat_cdf,q=q),wtd.mean(res$dt_ext$mu0,weights=res$dt_ext$w),
                wtd.mean(res$dt_ext$var0,weights=res$dt_ext$w)+wtd.var(res$dt_ext$mu0,weights=res$dt_ext$w))
  
  # FE and Posterior means
  s=as.matrix(res$dt_ext[,lapply(.SD,wtd.quantile,probs=c(0.1,0.25,0.5,0.75,0.9),na.rm=TRUE,weights=w),
                         .SDcols=c("mg","mu1")])
  s=rbind(s,res$dt_ext[,lapply(.SD,wtd.mean,na.rm=TRUE,weights=w),.SDcols=c("mg","mu1")])
  s=rbind(s,res$dt_ext[,lapply(.SD,wtd.var,na.rm=TRUE,weights=w),.SDcols=c("mg","mu1")])
  
  # Posterior 
  col_p = res$table_mg[1:7,]
  
  # Together: prior, FE, P, PM
  tab = cbind(col_prior,s[,1],col_p,s[,2])
  tab = as.matrix(tab)
  rownames(tab)=c(paste0("Percentile ",c(10,25,50,75,90)),"Mean","Variance")  
  colnames(tab)=c("Prior","FE","Posterior","Posterior means")  
  
  tab_mg =tab
  
  ########################################
  # Return
  ########################################
  
  return(list(tab_prior=tab_prior,tab_coef=tab_coef,tab_mg=tab_mg))
  
}

